{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 4.2解析真实地址抓取"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-01-04T12:34:32.289444Z",
     "start_time": "2019-01-04T12:34:31.643435Z"
    },
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "/**/ typeof jQuery112403473268296510956_1531502963311 === 'function' && jQuery112403473268296510956_1531502963311({\"results\":{\"parents\":[{\"replySeq\":36631816,\"name\":\"用户6190237002\",\"memberId\":\"6190237002\",\"memberIcon\":\"http://tvax3.sinaimg.cn/default/images/default_avatar_male_50.gif\",\"memberUrl\":\"https://weibo.com/u/6190237002\",\"memberDomain\":\"weibo_sina\",\"good\":0,\"bad\":0,\"police\":0,\"parentSeq\":36631816,\"directSeq\":0,\"shortUrl\":null,\"title\":\"Hello world!\",\"site\":\"http://www.santostang.com/2018/07/04/hello-world/\",\"email\":null,\"ipAddress\":\"120.236.177.107\",\"isMobile\":\"0\",\"agent\":\"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.80 Safari/537.36\",\"septSns\":\"weibo_sina\",\"targetService\":null,\"targetUserName\":null,\"info1\":null,\"info2\":null,\"info3\":null,\"image1\":null,\"image2\":null,\"image3\":null,\"link1\":null,\"link2\":null,\"link3\":null,\"isSecret\":0,\"isModified\":0,\"confirm\":0,\"subCount\":0,\"regdate\":\"2018-12-27T15:10:40.000Z\",\"deletedDate\":null,\"file1\":null,\"file2\":null,\"file3\":null,\"additionalSeq\":0,\"content\":\"我来评论一下\",\"quotationSeq\":null,\"quotationContent\":null,\"consumerSeq\":1020,\"livereSeq\":28583,\"repSeq\":4272904,\"memberGroupSeq\":29677750,\"memberSeq\":30197150,\"status\":0,\"repGroupSeq\":0,\"adminSeq\":25413747,\"deleteReason\":null,\"sticker\":0,\"version\":null},{\"replySeq\":36592594,\"name\":\"冯渊\",\"memberId\":\"oBVoaxBdFYWhpbVVXVHJpEo64eM0\",\"memberIcon\":\"http://thirdwx.qlogo.cn/mmopen/vi_32/aUjsNaevskersGxDbTvOgcfXV4IA2s1HITJQ7CiaswkrKMTY2XWpv3BibA103N3wjFDiaBy9oLeseNocCsJcxsEkA/132\",\"memberUrl\":\"http://www.wechat.com\",\"memberDomain\":\"wechat\",\"good\":0,\"bad\":0,\"police\":0,\"parentSeq\":36592594,\"directSeq\":0,\"shortUrl\":null,\"title\":\"Hello world!\",\"site\":\"http://www.santostang.com/2018/07/04/hello-world/\",\"email\":null,\"ipAddress\":\"183.17.235.171\",\"isMobile\":\"0\",\"agent\":\"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36\",\"septSns\":null,\"targetService\":null,\"targetUserName\":null,\"info1\":null,\"info2\":null,\"info3\":null,\"image1\":null,\"image2\":null,\"image3\":null,\"link1\":null,\"link2\":null,\"link3\":null,\"isSecret\":0,\"isModified\":0,\"confirm\":0,\"subCount\":0,\"regdate\":\"2018-12-20T11:55:46.000Z\",\"deletedDate\":null,\"file1\":null,\"file2\":null,\"file3\":null,\"additionalSeq\":0,\"content\":\"这是一条测试评论\",\"quotationSeq\":null,\"quotationContent\":null,\"consumerSeq\":1020,\"livereSeq\":28583,\"repSeq\":4272904,\"memberGroupSeq\":29636601,\"memberSeq\":30155590,\"status\":0,\"repGroupSeq\":0,\"adminSeq\":25413747,\"deleteReason\":null,\"sticker\":0,\"version\":null},{\"replySeq\":36591166,\"name\":\"奶头乐\",\"memberId\":\"UID_06CAE7F02FA93E0B923D784443B928A4\",\"memberIcon\":\"http://thirdqq.qlogo.cn/qqapp/101256433/22388AFF7A12D6DF455EDAC048BE4DD4/100\",\"memberUrl\":\"https://qq.com/\",\"memberDomain\":\"qq\",\"good\":0,\"bad\":0,\"police\":0,\"parentSeq\":36591166,\"directSeq\":0,\"shortUrl\":null,\"title\":\"Hello world!\",\"site\":\"http://www.santostang.com/2018/07/04/hello-world/\",\"email\":null,\"ipAddress\":\"45.78.11.91\",\"isMobile\":\"0\",\"agent\":\"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36\",\"septSns\":null,\"targetService\":null,\"targetUserName\":null,\"info1\":null,\"info2\":null,\"info3\":null,\"image1\":null,\"image2\":null,\"image3\":null,\"link1\":null,\"link2\":null,\"link3\":null,\"isSecret\":0,\"isModified\":0,\"confirm\":0,\"subCount\":0,\"regdate\":\"2018-12-20T07:47:04.000Z\",\"deletedDate\":null,\"file1\":null,\"file2\":null,\"file3\":null,\"additionalSeq\":0,\"content\":\"这是一条测试评论\",\"quotationSeq\":null,\"quotationContent\":null,\"consumerSeq\":1020,\"livereSeq\":28583,\"repSeq\":4272904,\"memberGroupSeq\":29635197,\"memberSeq\":30154167,\"status\":0,\"repGroupSeq\":0,\"adminSeq\":25413747,\"deleteReason\":null,\"sticker\":0,\"version\":null},{\"replySeq\":36560100,\"name\":\"aaa\",\"memberId\":\"adsad@qq.com\",\"memberIcon\":\"https://cdn-city.livere.com/images/user_profile_1.png\",\"memberUrl\":null,\"memberDomain\":\"livere\",\"good\":0,\"bad\":0,\"police\":0,\"parentSeq\":36560100,\"directSeq\":0,\"shortUrl\":null,\"title\":\"Hello world!\",\"site\":\"http://www.santostang.com/2018/07/04/hello-world/\",\"email\":null,\"ipAddress\":\"60.12.210.98\",\"isMobile\":\"0\",\"agent\":\"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36\",\"septSns\":null,\"targetService\":null,\"targetUserName\":null,\"info1\":null,\"info2\":null,\"info3\":null,\"image1\":null,\"image2\":null,\"image3\":null,\"link1\":null,\"link2\":null,\"link3\":null,\"isSecret\":0,\"isModified\":0,\"confirm\":0,\"subCount\":0,\"regdate\":\"2018-12-14T13:10:13.000Z\",\"deletedDate\":null,\"file1\":null,\"file2\":null,\"file3\":null,\"additionalSeq\":0,\"content\":\"csass\",\"quotationSeq\":null,\"quotationContent\":null,\"consumerSeq\":1020,\"livereSeq\":28583,\"repSeq\":4272904,\"memberGroupSeq\":0,\"memberSeq\":0,\"status\":0,\"repGroupSeq\":0,\"adminSeq\":25413747,\"deleteReason\":null,\"sticker\":0,\"version\":null},{\"replySeq\":36520275,\"name\":\"Dove丶\",\"memberId\":\"oBVoaxGLoY0_4Rgn26uY09JgQc-o\",\"memberIcon\":\"http://thirdwx.qlogo.cn/mmopen/vi_32/InZgCgEZVcBmkLM9uKuPibduzXDhNLLNanXvC7yGZSOib7Hf7TM9TZHB9Ck0l5CEDJGSpPT5CBkhcFDjI1Oic7hrg/132\",\"memberUrl\":\"http://www.wechat.com\",\"memberDomain\":\"wechat\",\"good\":0,\"bad\":0,\"police\":0,\"parentSeq\":36520275,\"directSeq\":0,\"shortUrl\":null,\"title\":\"Hello world!\",\"site\":\"http://www.santostang.com/2018/07/04/hello-world/\",\"email\":null,\"ipAddress\":\"183.6.46.31\",\"isMobile\":\"0\",\"agent\":\"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36\",\"septSns\":null,\"targetService\":null,\"targetUserName\":null,\"info1\":null,\"info2\":null,\"info3\":null,\"image1\":null,\"image2\":null,\"image3\":null,\"link1\":null,\"link2\":null,\"link3\":null,\"isSecret\":0,\"isModified\":0,\"confirm\":0,\"subCount\":0,\"regdate\":\"2018-12-08T09:36:27.000Z\",\"deletedDate\":null,\"file1\":null,\"file2\":null,\"file3\":null,\"additionalSeq\":0,\"content\":\"这是一条测试评论\",\"quotationSeq\":null,\"quotationContent\":null,\"consumerSeq\":1020,\"livereSeq\":28583,\"repSeq\":4272904,\"memberGroupSeq\":29565099,\"memberSeq\":30083386,\"status\":0,\"repGroupSeq\":0,\"adminSeq\":25413747,\"deleteReason\":null,\"sticker\":0,\"version\":null},{\"replySeq\":36486898,\"name\":\"panda\",\"memberId\":\"oBVoaxBt9wMk7Oy9WZrrr1pyxAzc\",\"memberIcon\":\"http://thirdwx.qlogo.cn/mmopen/vi_32/Q0j4TwGTfTJouAWB6JnHC3hw4nDs3ZS3pYia9BvkEbQnqMOj20ST47yK5hbk9yEuRHNlAS5iaWc5Z3R8DIZpQV6A/132\",\"memberUrl\":\"http://www.wechat.com\",\"memberDomain\":\"wechat\",\"good\":0,\"bad\":0,\"police\":0,\"parentSeq\":36486898,\"directSeq\":0,\"shortUrl\":null,\"title\":\"Hello world!\",\"site\":\"http://www.santostang.com/2018/07/04/hello-world/\",\"email\":null,\"ipAddress\":\"115.194.181.178\",\"isMobile\":\"0\",\"agent\":\"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36\",\"septSns\":null,\"targetService\":null,\"targetUserName\":null,\"info1\":null,\"info2\":null,\"info3\":null,\"image1\":null,\"image2\":null,\"image3\":null,\"link1\":null,\"link2\":null,\"link3\":null,\"isSecret\":0,\"isModified\":0,\"confirm\":0,\"subCount\":0,\"regdate\":\"2018-12-04T02:43:38.000Z\",\"deletedDate\":null,\"file1\":null,\"file2\":null,\"file3\":null,\"additionalSeq\":0,\"content\":\"熊猫的评论 是第58条\",\"quotationSeq\":null,\"quotationContent\":null,\"consumerSeq\":1020,\"livereSeq\":28583,\"repSeq\":4272904,\"memberGroupSeq\":29542892,\"memberSeq\":30060997,\"status\":0,\"repGroupSeq\":0,\"adminSeq\":25413747,\"deleteReason\":null,\"sticker\":0,\"version\":null},{\"replySeq\":36447705,\"name\":\"梁，\",\"memberId\":\"UID_C9E9555F532A5ADD2287A8EED1A14C47\",\"memberIcon\":\"http://thirdqq.qlogo.cn/qqapp/101256433/4BC89E50547C352218D2B107304DC5F5/100\",\"memberUrl\":\"https://qq.com/\",\"memberDomain\":\"qq\",\"good\":0,\"bad\":0,\"police\":0,\"parentSeq\":36447705,\"directSeq\":0,\"shortUrl\":null,\"title\":\"Hello world!\",\"site\":\"http://www.santostang.com/2018/07/04/hello-world/\",\"email\":null,\"ipAddress\":\"218.240.144.242\",\"isMobile\":\"0\",\"agent\":\"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36\",\"septSns\":null,\"targetService\":null,\"targetUserName\":null,\"info1\":null,\"info2\":null,\"info3\":null,\"image1\":null,\"image2\":null,\"image3\":null,\"link1\":null,\"link2\":null,\"link3\":null,\"isSecret\":0,\"isModified\":0,\"confirm\":0,\"subCount\":0,\"regdate\":\"2018-11-27T08:15:45.000Z\",\"deletedDate\":null,\"file1\":null,\"file2\":null,\"file3\":null,\"additionalSeq\":0,\"content\":\"评论试试啊 :smiley:\",\"quotationSeq\":null,\"quotationContent\":null,\"consumerSeq\":1020,\"livereSeq\":28583,\"repSeq\":4272904,\"memberGroupSeq\":29506926,\"memberSeq\":30024548,\"status\":0,\"repGroupSeq\":0,\"adminSeq\":25413747,\"deleteReason\":null,\"sticker\":0,\"version\":null},{\"replySeq\":36441741,\"name\":\"计科 执念\",\"memberId\":\"UID_1BA00BC28573BCBB7139AD0C5A1BC824\",\"memberIcon\":\"http://thirdqq.qlogo.cn/qqapp/101256433/EC4BB263445DB8CBBE817EA127856901/100\",\"memberUrl\":\"https://qq.com/\",\"memberDomain\":\"qq\",\"good\":0,\"bad\":0,\"police\":0,\"parentSeq\":36441741,\"directSeq\":0,\"shortUrl\":null,\"title\":\"Hello world!\",\"site\":\"http://www.santostang.com/2018/07/04/hello-world/\",\"email\":null,\"ipAddress\":\"36.7.131.147\",\"isMobile\":\"0\",\"agent\":\"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36\",\"septSns\":null,\"targetService\":null,\"targetUserName\":null,\"info1\":null,\"info2\":null,\"info3\":null,\"image1\":null,\"image2\":null,\"image3\":null,\"link1\":null,\"link2\":null,\"link3\":null,\"isSecret\":0,\"isModified\":0,\"confirm\":0,\"subCount\":0,\"regdate\":\"2018-11-26T10:48:43.000Z\",\"deletedDate\":null,\"file1\":null,\"file2\":null,\"file3\":null,\"additionalSeq\":0,\"content\":\"121212\",\"quotationSeq\":null,\"quotationContent\":null,\"consumerSeq\":1020,\"livereSeq\":28583,\"repSeq\":4272904,\"memberGroupSeq\":29502033,\"memberSeq\":30019602,\"status\":0,\"repGroupSeq\":0,\"adminSeq\":25413747,\"deleteReason\":null,\"sticker\":0,\"version\":null},{\"replySeq\":36441739,\"name\":\"计科 执念\",\"memberId\":\"UID_1BA00BC28573BCBB7139AD0C5A1BC824\",\"memberIcon\":\"http://thirdqq.qlogo.cn/qqapp/101256433/EC4BB263445DB8CBBE817EA127856901/100\",\"memberUrl\":\"https://qq.com/\",\"memberDomain\":\"qq\",\"good\":0,\"bad\":0,\"police\":0,\"parentSeq\":36441739,\"directSeq\":0,\"shortUrl\":null,\"title\":\"Hello world!\",\"site\":\"http://www.santostang.com/2018/07/04/hello-world/\",\"email\":null,\"ipAddress\":\"36.7.131.147\",\"isMobile\":\"0\",\"agent\":\"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36\",\"septSns\":null,\"targetService\":null,\"targetUserName\":null,\"info1\":null,\"info2\":null,\"info3\":null,\"image1\":null,\"image2\":null,\"image3\":null,\"link1\":null,\"link2\":null,\"link3\":null,\"isSecret\":0,\"isModified\":0,\"confirm\":0,\"subCount\":0,\"regdate\":\"2018-11-26T10:48:37.000Z\",\"deletedDate\":null,\"file1\":null,\"file2\":null,\"file3\":null,\"additionalSeq\":0,\"content\":\"1212\",\"quotationSeq\":null,\"quotationContent\":null,\"consumerSeq\":1020,\"livereSeq\":28583,\"repSeq\":4272904,\"memberGroupSeq\":29502033,\"memberSeq\":30019602,\"status\":0,\"repGroupSeq\":0,\"adminSeq\":25413747,\"deleteReason\":null,\"sticker\":0,\"version\":null},{\"replySeq\":36366311,\"name\":\"sss\",\"memberId\":\"368785@qq.com\",\"memberIcon\":\"https://cdn-city.livere.com/images/user_profile_1.png\",\"memberUrl\":null,\"memberDomain\":\"livere\",\"good\":0,\"bad\":0,\"police\":0,\"parentSeq\":36366311,\"directSeq\":0,\"shortUrl\":null,\"title\":\"Hello world!\",\"site\":\"http://www.santostang.com/2018/07/04/hello-world/\",\"email\":null,\"ipAddress\":\"119.39.18.225\",\"isMobile\":\"0\",\"agent\":\"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36\",\"septSns\":null,\"targetService\":null,\"targetUserName\":null,\"info1\":null,\"info2\":null,\"info3\":null,\"image1\":null,\"image2\":null,\"image3\":null,\"link1\":null,\"link2\":null,\"link3\":null,\"isSecret\":0,\"isModified\":0,\"confirm\":0,\"subCount\":0,\"regdate\":\"2018-11-13T15:16:08.000Z\",\"deletedDate\":null,\"file1\":null,\"file2\":null,\"file3\":null,\"additionalSeq\":0,\"content\":\"chrom反应慢，看得见这些评论，我就是用的chrom,还有4.3.1，没有按照唐大师说的，只要下载好解压到scripts中，就可以了，按照网上其他帖子可以打开firefox浏览器\",\"quotationSeq\":null,\"quotationContent\":null,\"consumerSeq\":1020,\"livereSeq\":28583,\"repSeq\":4272904,\"memberGroupSeq\":0,\"memberSeq\":0,\"status\":0,\"repGroupSeq\":0,\"adminSeq\":25413747,\"deleteReason\":null,\"sticker\":0,\"version\":null}],\"children\":[],\"quotations\":[]},\"resultCode\":200,\"resultMessage\":\"Okay, livere\"});\n"
     ]
    }
   ],
   "source": [
    "import requests\n",
    "\n",
    "link = \"\"\"https://api-zero.livere.com/v1/comments/list?callback=jQuery112403473268296510956_1531502963311&limit=10&repSeq=4272904&requestPath=%2Fv1%2Fcomments%2Flist&consumerSeq=1020&livereSeq=28583&smartloginSeq=5154&_=1531502963313\"\"\"\n",
    "\n",
    "headers = {'User-Agent' : 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'} \n",
    "\n",
    "r = requests.get(link, headers= headers)\n",
    "print (r.text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-01-04T12:34:40.357672Z",
     "start_time": "2019-01-04T12:34:40.355165Z"
    }
   },
   "outputs": [],
   "source": [
    "# 获取 json 的 string\n",
    "json_string = r.text\n",
    "json_string = json_string[json_string.find('{'):-2] \n",
    "# 从第一个左大括号提取，最后的两个字符 - 括号和分号不取"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-09-24T16:09:00.500018Z",
     "start_time": "2017-09-24T16:09:00.492953Z"
    },
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "第21条测试评论\n",
      "第20条测试评论\n",
      "第19条测试评论\n",
      "第18条测试评论\n",
      "第17条测试评论\n",
      "第16条测试评论\n",
      "第15条测试评论\n",
      "第14条测试评论\n",
      "第13条测试评论\n",
      "第12条测试评论\n"
     ]
    }
   ],
   "source": [
    "import json\n",
    "json_data = json.loads(json_string)\n",
    "comment_list = json_data['results']['parents']\n",
    "\n",
    "for eachone in comment_list:\n",
    "    message = eachone['content']\n",
    "    print (message)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-09-24T16:22:43.422456Z",
     "start_time": "2017-09-24T16:22:39.406885Z"
    },
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "https://api-zero.livere.com/v1/comments/list?callback=jQuery112407875296433383039_1506267778283&limit=10&offset=1&repSeq=3871836&requestPath=%2Fv1%2Fcomments%2Flist&consumerSeq=1020&livereSeq=28583&smartloginSeq=5154&_=1506267778285\n",
      "第21条测试评论\n",
      "第20条测试评论\n",
      "第19条测试评论\n",
      "第18条测试评论\n",
      "第17条测试评论\n",
      "第16条测试评论\n",
      "第15条测试评论\n",
      "第14条测试评论\n",
      "第13条测试评论\n",
      "第12条测试评论\n",
      "https://api-zero.livere.com/v1/comments/list?callback=jQuery112407875296433383039_1506267778283&limit=10&offset=2&repSeq=3871836&requestPath=%2Fv1%2Fcomments%2Flist&consumerSeq=1020&livereSeq=28583&smartloginSeq=5154&_=1506267778285\n",
      "第11条测试评论\n",
      "第10条测试评论\n",
      "第9条测试评论\n",
      "第8条测试评论\n",
      "第7条测试评论\n",
      "第6条测试评论\n",
      "第5条测试评论\n",
      "第4条测试评论\n",
      "第3条测试评论\n",
      "第二条测试评论\n",
      "https://api-zero.livere.com/v1/comments/list?callback=jQuery112407875296433383039_1506267778283&limit=10&offset=3&repSeq=3871836&requestPath=%2Fv1%2Fcomments%2Flist&consumerSeq=1020&livereSeq=28583&smartloginSeq=5154&_=1506267778285\n",
      "第一条测试评论\n"
     ]
    }
   ],
   "source": [
    "import requests\n",
    "import json\n",
    "\n",
    "def single_page_comment(link):\n",
    "    headers = {'User-Agent' : 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6'} \n",
    "    r = requests.get(link, headers= headers)\n",
    "    # 获取 json 的 string\n",
    "    json_string = r.text\n",
    "    json_string = json_string[json_string.find('{'):-2]\n",
    "    json_data = json.loads(json_string)\n",
    "    comment_list = json_data['results']['parents']\n",
    "    \n",
    "    for eachone in comment_list:\n",
    "        message = eachone['content']\n",
    "        print (message)\n",
    "\n",
    "for page in range(1,4):\n",
    "    link1 = \"https://api-zero.livere.com/v1/comments/list?callback=jQuery112407875296433383039_1506267778283&limit=10&offset=\"\n",
    "    link2 = \"&repSeq=3871836&requestPath=%2Fv1%2Fcomments%2Flist&consumerSeq=1020&livereSeq=28583&smartloginSeq=5154&_=1506267778285\"\n",
    "    page_str = str(page)\n",
    "    link = link1 + page_str + link2\n",
    "    print (link)\n",
    "    single_page_comment(link)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 4.3通过 selenium 模拟浏览器抓取"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 4.3.1 selenium 的安装与基本介绍"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2017-09-24T16:24:30.561998Z",
     "start_time": "2017-09-24T16:24:27.616197Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Collecting selenium\n",
      "  Downloading selenium-3.5.0-py2.py3-none-any.whl (921kB)\n",
      "\u001b[K    100% |████████████████████████████████| 921kB 579kB/s ta 0:00:01\n",
      "\u001b[?25hInstalling collected packages: selenium\n",
      "Successfully installed selenium-3.5.0\n"
     ]
    }
   ],
   "source": [
    "! pip install selenium"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-01-04T12:37:53.981066Z",
     "start_time": "2019-01-04T12:37:42.785103Z"
    },
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# 以下这段会报错\n",
    "from selenium import webdriver\n",
    "driver = webdriver.Firefox()\n",
    "driver.get(\"http://www.santostang.com/2018/07/04/hello-world/\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2019-01-04T12:41:40.590180Z",
     "start_time": "2019-01-04T12:41:31.216257Z"
    }
   },
   "outputs": [],
   "source": [
    "from selenium import webdriver\n",
    "\n",
    "driver = webdriver.Firefox(executable_path = r'C:\\Users\\santostang\\Desktop\\geckodriver.exe')\n",
    "#把上述地址改成你电脑中geckodriver.exe程序的地址\n",
    "driver.get(\"http://www.santostang.com/2018/07/04/hello-world/\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 以下这段会报错\n",
    "comment = driver.find_element_by_css_selector('div.reply-content')\n",
    "content = comment.find_element_by_tag_name('p')\n",
    "print (content.text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-13T15:37:07.605184Z",
     "start_time": "2018-11-13T15:37:07.573097Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "chrom反应慢，看得见这些评论，我就是用的chrom,还有4.3.1，没有按照唐大师说的，只要下载好解压到scripts中，就可以了，按照网上其他帖子可以打开firefox浏览器\n"
     ]
    }
   ],
   "source": [
    "driver.switch_to.frame(driver.find_element_by_css_selector(\"iframe[title='livere']\"))\n",
    "comment = driver.find_element_by_css_selector('div.reply-content')\n",
    "content = comment.find_element_by_tag_name('p')\n",
    "print (content.text)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 4.3.3 selenium获取文章的所有评论"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-14T14:44:29.067359Z",
     "start_time": "2018-11-14T14:44:08.050039Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "chrom反应慢，看得见这些评论，我就是用的chrom,还有4.3.1，没有按照唐大师说的，只要下载好解压到scripts中，就可以了，按照网上其他帖子可以打开firefox浏览器\n",
      "wordpress怎样用啊\n",
      "4.1 动态网页抓取 (解析真实地址 + selenium) 链接失效了\n",
      "刘一凡\n",
      "后边的学习不了了吗\n",
      "为什么用Chrome就看不见这些评论啊\n",
      "我发现只要点4.1 4.2 就炸\n",
      "崩了？\n",
      "为啥就是不能加载更多啊！！！都两天了！！！\n",
      "666\n",
      "余大大\n",
      "坚持\n",
      "255\n",
      "nice to meet you\n",
      "lla\n",
      "点赞！\n",
      "test\n",
      "测试终止\n",
      "测试测试1212！\n",
      "测试\n",
      "测试凭证\n",
      "11\n",
      "测试\n",
      "有这本书的学习交流群吗？\n",
      "第四章问题好多，代码各种错误，求更新\n",
      "测试\n",
      "学习一下\n",
      "读者吴先生到此一游。\n",
      "第21条测试评论\n",
      "第20条测试评论\n",
      "第19条测试评论\n",
      "第18条测试评论\n",
      "第17条测试评论\n",
      "第16条测试评论\n",
      "第15条测试评论\n",
      "第14条测试评论\n",
      "第13条测试评论\n",
      "第12条测试评论\n",
      "第11条测试评论\n",
      "第10条测试评论\n"
     ]
    }
   ],
   "source": [
    "from selenium import webdriver\n",
    "import time\n",
    "\n",
    "driver = webdriver.Firefox(executable_path = r'C:\\Users\\santostang\\Desktop\\geckodriver.exe')\n",
    "driver.implicitly_wait(20) # 隐性等待，最长等20秒\n",
    "#把上述地址改成你电脑中geckodriver.exe程序的地址\n",
    "driver.get(\"http://www.santostang.com/2018/07/04/hello-world/\")\n",
    "time.sleep(5)\n",
    "\n",
    "for i in range(0,3):\n",
    "    # 下滑到页面底部\n",
    "    driver.execute_script(\"window.scrollTo(0, document.body.scrollHeight);\")\n",
    "    # 转换iframe，再找到查看更多，点击\n",
    "    driver.switch_to.frame(driver.find_element_by_css_selector(\"iframe[title='livere']\"))\n",
    "    load_more = driver.find_element_by_css_selector('button.more-btn')\n",
    "    load_more.click()\n",
    "    # 把iframe又转回去\n",
    "    driver.switch_to.default_content()\n",
    "    time.sleep(2)\n",
    "\n",
    "driver.switch_to.frame(driver.find_element_by_css_selector(\"iframe[title='livere']\"))\n",
    "comments = driver.find_elements_by_css_selector('div.reply-content')\n",
    "for eachcomment in comments:\n",
    "    content = eachcomment.find_element_by_tag_name('p')\n",
    "    print (content.text)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 4.3.4\tSelenium的高级操作"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-14T15:46:42.863163Z",
     "start_time": "2018-11-14T15:46:32.714497Z"
    }
   },
   "outputs": [],
   "source": [
    "# 控制 css\n",
    "from selenium import webdriver\n",
    "\n",
    "fp = webdriver.FirefoxProfile()\n",
    "fp.set_preference(\"permissions.default.stylesheet\",2)\n",
    "\n",
    "driver = webdriver.Firefox(firefox_profile=fp, executable_path = r'C:\\Users\\santostang\\Desktop\\geckodriver.exe')\n",
    "#把上述地址改成你电脑中geckodriver.exe程序的地址\n",
    "driver.get(\"http://www.santostang.com/2018/07/04/hello-world/\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-14T15:52:42.493635Z",
     "start_time": "2018-11-14T15:52:33.174633Z"
    }
   },
   "outputs": [],
   "source": [
    "# 限制图片的加载\n",
    "from selenium import webdriver\n",
    "\n",
    "fp = webdriver.FirefoxProfile()\n",
    "fp.set_preference(\"permissions.default.image\",2)\n",
    "\n",
    "driver = webdriver.Firefox(firefox_profile=fp, executable_path = r'C:\\Users\\santostang\\Desktop\\geckodriver.exe')\n",
    "#把上述地址改成你电脑中geckodriver.exe程序的地址\n",
    "driver.get(\"http://www.santostang.com/2018/07/04/hello-world/\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2018-11-14T15:54:30.364380Z",
     "start_time": "2018-11-14T15:54:19.127990Z"
    }
   },
   "outputs": [],
   "source": [
    "# 限制 JavaScript 的执行\n",
    "from selenium import webdriver\n",
    "\n",
    "fp = webdriver.FirefoxProfile()\n",
    "fp.set_preference(\"javascript.enabled\", False)\n",
    "\n",
    "driver = webdriver.Firefox(firefox_profile=fp, executable_path = r'C:\\Users\\santostang\\Desktop\\geckodriver.exe')\n",
    "#把上述地址改成你电脑中geckodriver.exe程序的地址\n",
    "driver.get(\"http://www.santostang.com/2018/07/04/hello-world/\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {
    "height": "117px",
    "width": "252px"
   },
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": "block",
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
